615d3043b35164888b1125ab4a677e1163c00938,src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java,ProtobufAnnotationSerializer,fromProto,#CoreNLPProtos.Document#,669
Before Change
CoreMap map = fromProtoNoTokens(sentence);
if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd()) {
// Set tokens for sentence
map.set(TokensAnnotation.class, tokens.subList(sentence.getTokenOffsetBegin(), sentence.getTokenOffsetEnd()));
// Set sentence index + token index + paragraph index
for (int i = sentence.getTokenOffsetBegin(); i < sentence.getTokenOffsetEnd(); ++i) {
tokens.get(i).setSentIndex(sentIndex);
After Change
Annotation ann = new Annotation(proto.getText());
// Add tokens
List<CoreLabel> tokens = new ArrayList<CoreLabel>();
if (proto.getSentenceCount() > 0) {
// Populate the tokens from the sentence
for (CoreNLPProtos.Sentence sentence : proto.getSentenceList()) {
for (CoreNLPProtos.Token token : sentence.getTokenList()) {
CoreLabel coreLabel = fromProto(token);
// Set docid
if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
tokens.add(coreLabel);
}
}
} else if (proto.getSentencelessTokenCount() > 0) {
// Eek -- no sentences. Try to recover tokens directly
if (proto.getSentencelessTokenCount() > 0) {
for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
CoreLabel coreLabel = fromProto(token);
// Set docid
if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
tokens.add(coreLabel);
}
}
}
if (!tokens.isEmpty()) { ann.set(TokensAnnotation.class, tokens); }
// Add sentences
List<CoreMap> sentences = new ArrayList<CoreMap>(proto.getSentenceCount());
for (int sentIndex = 0; sentIndex < proto.getSentenceCount(); ++sentIndex) {
CoreNLPProtos.Sentence sentence = proto.getSentence(sentIndex);
CoreMap map = fromProtoNoTokens(sentence);
if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd()) {
// Set tokens for sentence
int tokenBegin = Math.min(sentence.getTokenOffsetBegin(), tokens.size());
int tokenEnd = Math.min(sentence.getTokenOffsetEnd(), tokens.size());
map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
// Set sentence index + token index + paragraph index
for (int i = tokenBegin; i < tokenEnd; ++i) {
tokens.get(i).setSentIndex(sentIndex);